TODO
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data <- read.csv("mp_batteries.csv")
columns <- names(data)
string_columns <- c("Battery.Formula", "Working.Ion", "Formula.Charge", "Formula.Discharge")
numeric_columns <- setdiff(columns, c(string_columns, "Battery.ID"))
nrow(data)
## [1] 4351
summary(data)
## Battery.ID Battery.Formula Working.Ion Formula.Charge
## Length:4351 Length:4351 Length:4351 Length:4351
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Formula.Discharge Max.Delta.Volume Average.Voltage Gravimetric.Capacity
## Length:4351 Min. : 0.00002 Min. :-7.755 Min. : 5.176
## Class :character 1st Qu.: 0.01747 1st Qu.: 2.226 1st Qu.: 88.108
## Mode :character Median : 0.04203 Median : 3.301 Median : 130.691
## Mean : 0.37531 Mean : 3.083 Mean : 158.291
## 3rd Qu.: 0.08595 3rd Qu.: 4.019 3rd Qu.: 187.600
## Max. :293.19322 Max. :54.569 Max. :2557.627
## Volumetric.Capacity Gravimetric.Energy Volumetric.Energy
## Min. : 24.08 Min. :-583.5 Min. :-2208.1
## 1st Qu.: 311.62 1st Qu.: 211.7 1st Qu.: 821.6
## Median : 507.03 Median : 401.8 Median : 1463.8
## Mean : 610.62 Mean : 444.1 Mean : 1664.0
## 3rd Qu.: 722.75 3rd Qu.: 614.4 3rd Qu.: 2252.3
## Max. :7619.19 Max. :5926.9 Max. :18305.9
## Atomic.Fraction.Charge Atomic.Fraction.Discharge Stability.Charge
## Min. :0.00000 Min. :0.007407 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.086957 1st Qu.:0.03301
## Median :0.00000 Median :0.142857 Median :0.07319
## Mean :0.03986 Mean :0.159077 Mean :0.14257
## 3rd Qu.:0.04762 3rd Qu.:0.200000 3rd Qu.:0.13160
## Max. :0.90909 Max. :0.993333 Max. :6.48710
## Stability.Discharge Steps Max.Voltage.Step
## Min. :0.00000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.01952 1st Qu.:1.000 1st Qu.: 0.0000
## Median :0.04878 Median :1.000 Median : 0.0000
## Mean :0.12207 Mean :1.167 Mean : 0.1503
## 3rd Qu.:0.09299 3rd Qu.:1.000 3rd Qu.: 0.0000
## Max. :6.27781 Max. :6.000 Max. :26.9607
counts <- data.frame()
for (col in string_columns)
{
col_counts <- data %>%
count(get(col), name = "Count") %>%
arrange(desc(Count))
col_counts$var <- col
counts <- rbind(counts, col_counts)
top_10_counts <- col_counts %>%
slice(1:10)
print(top_10_counts)
}
## get(col) Count var
## 1 Li0-1V2OF5 19 Battery.Formula
## 2 Li0-1CoPO4 18 Battery.Formula
## 3 Li0-1FePO4 18 Battery.Formula
## 4 Li0-3MnFeCo(PO4)3 17 Battery.Formula
## 5 Li0-1MnPO4 15 Battery.Formula
## 6 Li0-1V4OF11 15 Battery.Formula
## 7 Li0-1V4O5F7 12 Battery.Formula
## 8 Li0-1VF5 12 Battery.Formula
## 9 Li0-1CrP2O7 11 Battery.Formula
## 10 Li0-2MnP2O7 11 Battery.Formula
## get(col) Count var
## 1 Li 2440 Working.Ion
## 2 Ca 435 Working.Ion
## 3 Mg 423 Working.Ion
## 4 Zn 366 Working.Ion
## 5 Na 309 Working.Ion
## 6 K 107 Working.Ion
## 7 Al 95 Working.Ion
## 8 Y 93 Working.Ion
## 9 Rb 50 Working.Ion
## 10 Cs 33 Working.Ion
## get(col) Count var
## 1 MnO2 49 Formula.Charge
## 2 TiO2 47 Formula.Charge
## 3 VO2 46 Formula.Charge
## 4 CrO2 45 Formula.Charge
## 5 CoO2 43 Formula.Charge
## 6 NiO2 41 Formula.Charge
## 7 FeO2 36 Formula.Charge
## 8 FePO4 26 Formula.Charge
## 9 WO2 25 Formula.Charge
## 10 CoPO4 24 Formula.Charge
## get(col) Count var
## 1 LiCoPO4 19 Formula.Discharge
## 2 LiFePO4 19 Formula.Discharge
## 3 LiMnPO4 19 Formula.Discharge
## 4 LiV2OF5 19 Formula.Discharge
## 5 Li5Mn6(BO3)6 18 Formula.Discharge
## 6 Li3MnFeCo(PO4)3 17 Formula.Discharge
## 7 LiV4OF11 15 Formula.Discharge
## 8 Li2MnP2O7 14 Formula.Discharge
## 9 Li2FeSiO4 13 Formula.Discharge
## 10 LiCrPO4 12 Formula.Discharge
ggplot(counts, aes(x = Count)) +
geom_histogram(binwidth = 1, fill = "green", alpha = 0.7) +
labs(
title = paste("Liczba wystąpień wartości dla zmiennej"),
x = "Liczba wystąpień",
y = "Liczba różnych wartości"
) +
facet_wrap(~var, scales="free") +
theme_minimal()
numeric_df <- data[, numeric_columns]
numeric_df_long <- numeric_df %>%
pivot_longer(colnames(numeric_df)) %>%
as.data.frame()
ggplot(numeric_df_long, aes(x = value)) +
geom_histogram(fill = "green", alpha = 0.7) +
facet_wrap(~ name, scales = "free") +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
numeric_df <- data[, numeric_columns]
# Calculate the correlation matrix
correlation_matrix <- cor(numeric_df)
# Convert the matrix to a data frame for saving
correlation_df <- as.data.frame(as.table(correlation_matrix))
# Rename the columns for clarity
names(correlation_df) <- c("x", "y", "cor")
correlation_df_one_dir <- correlation_df[as.character(correlation_df$x) < as.character(correlation_df$y), ]
Korelacja wszytskich par zmiennych numerycznych
print(correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ])
## x y cor
## 65 Gravimetric.Energy Volumetric.Energy 0.928325316
## 39 Gravimetric.Capacity Volumetric.Capacity 0.858416267
## 117 Stability.Charge Stability.Discharge 0.802870095
## 32 Atomic.Fraction.Discharge Gravimetric.Capacity 0.680771641
## 50 Average.Voltage Gravimetric.Energy 0.665652274
## 44 Atomic.Fraction.Discharge Volumetric.Capacity 0.618018605
## 91 Atomic.Fraction.Charge Atomic.Fraction.Discharge 0.597415747
## 62 Average.Voltage Volumetric.Energy 0.554519056
## 132 Max.Voltage.Step Steps 0.535253881
## 3 Gravimetric.Capacity Max.Delta.Volume 0.433773349
## 137 Gravimetric.Energy Max.Voltage.Step 0.329232198
## 64 Volumetric.Capacity Volumetric.Energy 0.325748151
## 125 Gravimetric.Energy Steps 0.294607452
## 8 Atomic.Fraction.Discharge Max.Delta.Volume 0.290692092
## 72 Max.Voltage.Step Volumetric.Energy 0.252662508
## 37 Max.Delta.Volume Volumetric.Capacity 0.242476920
## 71 Steps Volumetric.Energy 0.238142024
## 63 Gravimetric.Capacity Volumetric.Energy 0.230421553
## 51 Gravimetric.Capacity Gravimetric.Energy 0.213246271
## 38 Average.Voltage Volumetric.Capacity -0.212817820
## 41 Gravimetric.Energy Volumetric.Capacity 0.209840583
## 69 Stability.Charge Volumetric.Energy 0.178327117
## 20 Atomic.Fraction.Discharge Average.Voltage -0.171690301
## 101 Gravimetric.Energy Stability.Charge 0.166981905
## 98 Average.Voltage Stability.Charge 0.166137144
## 128 Atomic.Fraction.Discharge Steps 0.164171296
## 67 Atomic.Fraction.Charge Volumetric.Energy -0.147352294
## 26 Average.Voltage Gravimetric.Capacity -0.146222183
## 123 Gravimetric.Capacity Steps 0.133397655
## 31 Atomic.Fraction.Charge Gravimetric.Capacity 0.128921011
## 110 Average.Voltage Stability.Discharge -0.128456834
## 134 Average.Voltage Max.Voltage.Step 0.127120819
## 47 Steps Volumetric.Capacity 0.103705117
## 140 Atomic.Fraction.Discharge Max.Voltage.Step 0.101979645
## 45 Stability.Charge Volumetric.Capacity 0.101530488
## 55 Atomic.Fraction.Charge Gravimetric.Energy -0.097292412
## 135 Gravimetric.Capacity Max.Voltage.Step 0.095190645
## 108 Max.Voltage.Step Stability.Charge 0.094046568
## 2 Average.Voltage Max.Delta.Volume -0.082370729
## 113 Gravimetric.Energy Stability.Discharge -0.078260883
## 56 Atomic.Fraction.Discharge Gravimetric.Energy 0.064524784
## 99 Gravimetric.Capacity Stability.Charge 0.063387053
## 130 Stability.Discharge Steps -0.063168645
## 122 Average.Voltage Steps 0.062785083
## 48 Max.Voltage.Step Volumetric.Capacity 0.062608533
## 68 Atomic.Fraction.Discharge Volumetric.Energy 0.061058647
## 5 Gravimetric.Energy Max.Delta.Volume -0.060985786
## 70 Stability.Discharge Volumetric.Energy -0.059994876
## 61 Max.Delta.Volume Volumetric.Energy -0.058832142
## 115 Atomic.Fraction.Charge Stability.Discharge -0.052397088
## 19 Atomic.Fraction.Charge Average.Voltage -0.038555643
## 129 Stability.Charge Steps -0.037485986
## 97 Max.Delta.Volume Stability.Charge 0.033758650
## 104 Atomic.Fraction.Discharge Stability.Charge 0.032405104
## 46 Stability.Discharge Volumetric.Capacity 0.031701213
## 127 Atomic.Fraction.Charge Steps 0.029736912
## 103 Atomic.Fraction.Charge Stability.Charge -0.027357138
## 7 Atomic.Fraction.Charge Max.Delta.Volume 0.021315304
## 120 Max.Voltage.Step Stability.Discharge -0.016555176
## 116 Atomic.Fraction.Discharge Stability.Discharge 0.014320385
## 121 Max.Delta.Volume Steps -0.013258214
## 111 Gravimetric.Capacity Stability.Discharge 0.012538984
## 133 Max.Delta.Volume Max.Voltage.Step -0.009925145
## 109 Max.Delta.Volume Stability.Discharge 0.007735655
## 139 Atomic.Fraction.Charge Max.Voltage.Step 0.005342003
## 43 Atomic.Fraction.Charge Volumetric.Capacity 0.001245630
p <- ggplot(correlation_df) +
geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja pomiędzy", x, "i", y, "=", abs(cor)))) +
theme(axis.title = element_blank()) +
labs(fill="Korelacja") +
scale_fill_gradient(low="white", high="green") +
theme_minimal()
## Warning in geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja
## pomiędzy", : Ignoring unknown aesthetics: text
ggplotly(p, tooltip = "text") %>%
layout(
xaxis = list(
tickangle = 45,
title = ""
),
yaxis = list(
title = ""
)
)
Przedstawienie zależności 5 par zmiennych o najwyższej korelacji
top_5_correlation <- correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ] %>%
slice(1:5)
print(top_5_correlation)
## x y cor
## 1 Gravimetric.Energy Volumetric.Energy 0.9283253
## 2 Gravimetric.Capacity Volumetric.Capacity 0.8584163
## 3 Stability.Charge Stability.Discharge 0.8028701
## 4 Atomic.Fraction.Discharge Gravimetric.Capacity 0.6807716
## 5 Average.Voltage Gravimetric.Energy 0.6656523
ggplotly(
ggplot(data, aes(x = Gravimetric.Energy, y = Volumetric.Energy)) +
geom_point(aes(
x = Gravimetric.Energy,
y = Volumetric.Energy,
text = paste("ID baterii:", Battery.ID,
"\nGravimetric.Energy:",Gravimetric.Energy,
"\nVolumetric.Energy", Volumetric.Energy
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Gravimetric.Energy i Volumetric.Energy")) +
theme_minimal(),
tooltip = "text"
)
## Warning in geom_point(aes(x = Gravimetric.Energy, y = Volumetric.Energy, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Gravimetric.Capacity, y = Volumetric.Capacity)) +
geom_point(aes(
x = Gravimetric.Capacity,
y = Volumetric.Capacity,
text = paste(
"ID baterii:", Battery.ID,
"\nGravimetric.Capacity:", Gravimetric.Capacity,
"\nVolumetric.Capacity:", Volumetric.Capacity
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Gravimetric.Capacity i Volumetric.Capacity")) +
theme_minimal(),
tooltip = "text"
)
## Warning in geom_point(aes(x = Gravimetric.Capacity, y = Volumetric.Capacity, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Stability.Charge, y = Stability.Discharge)) +
geom_point(aes(
x = Stability.Charge,
y = Stability.Discharge,
text = paste(
"ID baterii:", Battery.ID,
"\nStability.Charge:", Stability.Charge,
"\nStability.Discharge:", Stability.Discharge
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Stability.Charge i Stability.Discharge")) +
theme_minimal(),
tooltip = "text"
)
## Warning in geom_point(aes(x = Stability.Charge, y = Stability.Discharge, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Atomic.Fraction.Discharge, y = Gravimetric.Capacity)) +
geom_point(aes(
x = Atomic.Fraction.Discharge,
y = Gravimetric.Capacity,
text = paste(
"ID baterii:", Battery.ID,
"\nAtomic.Fraction.Discharge:", Atomic.Fraction.Discharge,
"\nGravimetric.Capacity:", Gravimetric.Capacity
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Atomic.Fraction.Discharge i Gravimetric.Capacity")) +
theme_minimal(),
tooltip = "text"
)
## Warning in geom_point(aes(x = Atomic.Fraction.Discharge, y =
## Gravimetric.Capacity, : Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Average.Voltage, y = Gravimetric.Energy)) +
geom_point(aes(
x = Average.Voltage,
y = Gravimetric.Energy,
text = paste(
"ID baterii:", Battery.ID,
"\nAverage.Voltage:", Average.Voltage,
"\nGravimetric.Energy:", Gravimetric.Energy
)
)) +
geom_smooth(method = lm) +
labs(title = paste("Average.Voltage i Gravimetric.Energy")) +
theme_minimal(),
tooltip = "text"
)
## Warning in geom_point(aes(x = Average.Voltage, y = Gravimetric.Energy, text =
## paste("ID baterii:", : Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'